# Clean the environment with the following command
rm(list = ls())

This is an R Markdown Notebook for the Facies Classification Contest. In this notebook, I will try different classification methods that involve decision trees and ensemble classifiers.

This data is from the 2016 ML contest, with the focus for Facies Classification.

The provided data is a CSV file with well logs information from different wells. There are 5 well logs and 2 indicators:

The goal is to train a model able to classify 9 different types of rocks, as listed in the following table:

Facies Description Label Adjacent Facies
1 Nonmarine Sandstone SS 2
2 Nonmarine coarse siltstone CSiS 1,3
3 Nonmarine fine siltstone FSiS 2
4 Marine siltstone and shale SiSh 5
5 Mudstone MS 4,6
6 Wackestone WS 5,7,8
7 Dolomite D 6,8
8 Packstone-grainstone PS 6,7,9
9 Phylloid-algal bafflestone BS 7,8

So, let’s take a look at the data!

Simple Modelling

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

# Load .csv data to dataframe
data = read.csv("../Data/facies_vectors.csv")
head(data, 10)

Now I will create the list of features and facies names for future use:

features = c('GR','ILD_log10','DeltaPHI','PHIND','PE','NM_M','RELPOS')
well_logs = c('GR','ILD_log10','DeltaPHI','PHIND','PE')
facies_names = c('SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS')
facies_colors = c("1" = '#F4D03F', "2" = '#F5B041', "3" = '#DC7633', "4" = '#6E2C00',
                  "5" = '#1B4F72', "6" = '#2E86C1', "7" = '#AED6F1', "8" = '#A569BD',
                  "9" = '#196F3D')
target = "Facies"
testWell = "SHANKLE"

source("Utils.R")
pright <- plotwelllogs(datap = data, wname = testWell, dz = "Depth", wlogs = well_logs,
             facies = "Facies", fcolor = facies_colors, fnames = facies_names)

# Removing well "Recruit F9" from the data:
plotwelllogs(datap = data, wname = "Recruit F9", dz = "Depth", wlogs = well_logs,
             facies = "Facies", fcolor = facies_colors, fnames = facies_names)
## Warning: Removed 7 rows containing missing values (geom_path).

dataf = data[data$Well.Name != 'Recruit F9',]
unique(dataf$Well.Name)
## [1] SHRIMPLIN       ALEXANDER D     SHANKLE         LUKE G U       
## [5] KIMZEY A        CROSS H CATTLE  NOLAN           NEWBY          
## [9] CHURCHMAN BIBLE
## 10 Levels: ALEXANDER D CHURCHMAN BIBLE CROSS H CATTLE ... SHRIMPLIN

Split data into train and test datasets by selecting the well SHANKLE as the blind well for validation:

# For the xgboost package, target must be from 0 to num_class - 1
temp <- dataf
temp$Facies = temp$Facies - 1

# Separating the "blind well"
train = temp[temp$Well.Name != testWell, ]
test = temp[temp$Well.Name == testWell, ]
print(nrow(train))
## [1] 3620
print(nrow(test))
## [1] 449
print(nrow(dataf) - (nrow(train) + nrow(test)))
## [1] 0

For the gradient boosting classifier, I will use the xgboost package. Let’s load the libraries:

library(xgboost, quietly = TRUE, verbose = FALSE)
library(Matrix, quietly = TRUE, verbose = FALSE)
library(archdata, quietly = TRUE, verbose = FALSE) 
library(caret, quietly = TRUE, verbose = FALSE) 
library(dplyr, quietly = TRUE, verbose = FALSE)   
library(Ckmeans.1d.dp, quietly = TRUE, verbose = FALSE) 
library(e1071, quietly = TRUE, verbose = FALSE)
library(ggplot2, quietly = TRUE, verbose = FALSE)
library(gridExtra, quietly = TRUE, verbose = FALSE)
library(ggpubr, quietly = TRUE, verbose = FALSE)
library(gplots, quietly = TRUE, verbose = FALSE)
library(reshape2, quietly = TRUE, verbose = FALSE)
library(tidyverse, quietly = TRUE, verbose = FALSE)
library(GGally, quietly = TRUE, verbose = FALSE)
library(signal, quietly = TRUE, verbose = FALSE)

To use the xgboost package, we need to “convert” the training and test datasets to xgb.DMatrix:

# Converting datasets to xgb.DMatrix
dtrain = xgb.DMatrix(data.matrix(train[,features]),
                     label = data.matrix(train$Facies))
dtest = xgb.DMatrix(data.matrix(test[,features]),
                    label = data.matrix(test$Facies))

# Calculating number of classes in the data
numberOfClasses <- length(unique(dataf$Facies))

# Setting up parameters
param <- list(max_depth = 6, 
              eta = 1, 
              silent = 1, 
              nthread = 2,
              #subsample = 0.5,
              objective = "multi:softmax", 
              eval_metric = "merror", 
              "num_class" = numberOfClasses) 

# Defining training and evaluation sets
watchlist <- list(train = dtrain, 
                  eval = dtest)

# Generating first model using Gradient Boosting Trees
model01 = xgb.train(param, 
                    dtrain,
                    nrounds = 30,
                    watchlist)
## [1]  train-merror:0.293923   eval-merror:0.616926 
## [2]  train-merror:0.231768   eval-merror:0.556793 
## [3]  train-merror:0.188398   eval-merror:0.574610 
## [4]  train-merror:0.159116   eval-merror:0.563474 
## [5]  train-merror:0.133702   eval-merror:0.556793 
## [6]  train-merror:0.106906   eval-merror:0.565702 
## [7]  train-merror:0.087017   eval-merror:0.561247 
## [8]  train-merror:0.067956   eval-merror:0.572383 
## [9]  train-merror:0.052762   eval-merror:0.561247 
## [10] train-merror:0.037293   eval-merror:0.559020 
## [11] train-merror:0.021823   eval-merror:0.561247 
## [12] train-merror:0.018232   eval-merror:0.556793 
## [13] train-merror:0.015193   eval-merror:0.554566 
## [14] train-merror:0.010221   eval-merror:0.554566 
## [15] train-merror:0.009116   eval-merror:0.552339 
## [16] train-merror:0.006630   eval-merror:0.547884 
## [17] train-merror:0.003867   eval-merror:0.545657 
## [18] train-merror:0.003591   eval-merror:0.559020 
## [19] train-merror:0.003591   eval-merror:0.559020 
## [20] train-merror:0.003039   eval-merror:0.545657 
## [21] train-merror:0.002486   eval-merror:0.545657 
## [22] train-merror:0.002486   eval-merror:0.543430 
## [23] train-merror:0.002486   eval-merror:0.541203 
## [24] train-merror:0.002486   eval-merror:0.536748 
## [25] train-merror:0.002486   eval-merror:0.534521 
## [26] train-merror:0.002486   eval-merror:0.527840 
## [27] train-merror:0.002486   eval-merror:0.534521 
## [28] train-merror:0.002486   eval-merror:0.530067 
## [29] train-merror:0.002486   eval-merror:0.527840 
## [30] train-merror:0.002486   eval-merror:0.525612
# Computing prediction estimations for the test data
test_pred = predict(model01, 
                    newdata = data.matrix(test[,features]))
train_pred = predict(model01, 
                     newdata = data.matrix(train[,features]))

test$Predictions = test_pred

# confusion matrix of test set
xtab = createConfusionMatrix(test$Facies + 1, 
                             test$Predictions + 1)
xtab = t(xtab)
rownames(xtab) <- facies_names[1:8]
colnames(xtab) <- facies_names[1:8]
plotConfusionMatrix(xtab, relScale = T)

# Confucion matrix an statistical information
u <- union(test$Predictions + 1, test$Facies + 1)
t <- table(factor(test$Predictions + 1, u), factor(test$Facies + 1, u))

# Confucion matrix and statistical information
CM = confusionMatrix(t,
                     mode = "everything")
print(CM)
## Confusion Matrix and Statistics
## 
##    
##      3  2  1  5  8  6  4  7
##   3 65 16  0  0  0  1  0  0
##   2 50 65 82  0  0  0  0  0
##   1  2  8  7  0  0  0  0  0
##   5  0  0  0  1  2 10  0  0
##   8  0  0  0  1 17 12  1  3
##   6  0  0  0  9 19 46  5  2
##   4  0  0  0  7  0  2  1  1
##   7  0  0  0  1  2  0  0 11
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4744          
##                  95% CI : (0.4274, 0.5217)
##     No Information Rate : 0.2606          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3589          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 3 Class: 2 Class: 1 Class: 5 Class: 8 Class: 6
## Sensitivity            0.5556   0.7303  0.07865 0.052632  0.42500   0.6479
## Specificity            0.9488   0.6333  0.97222 0.972093  0.95844   0.9074
## Pos Pred Value         0.7927   0.3299  0.41176 0.076923  0.50000   0.5679
## Neg Pred Value         0.8583   0.9048  0.81019 0.958716  0.94458   0.9321
## Precision              0.7927   0.3299  0.41176 0.076923  0.50000   0.5679
## Recall                 0.5556   0.7303  0.07865 0.052632  0.42500   0.6479
## F1                     0.6533   0.4545  0.13208 0.062500  0.45946   0.6053
## Prevalence             0.2606   0.1982  0.19822 0.042316  0.08909   0.1581
## Detection Rate         0.1448   0.1448  0.01559 0.002227  0.03786   0.1024
## Detection Prevalence   0.1826   0.4388  0.03786 0.028953  0.07572   0.1804
## Balanced Accuracy      0.7522   0.6818  0.52544 0.512362  0.69172   0.7776
##                      Class: 4 Class: 7
## Sensitivity          0.142857  0.64706
## Specificity          0.977376  0.99306
## Pos Pred Value       0.090909  0.78571
## Neg Pred Value       0.986301  0.98621
## Precision            0.090909  0.78571
## Recall               0.142857  0.64706
## F1                   0.111111  0.70968
## Prevalence           0.015590  0.03786
## Detection Rate       0.002227  0.02450
## Detection Prevalence 0.024499  0.03118
## Balanced Accuracy    0.560116  0.82006
dtemp = test
dtemp$Predictions <- dtemp$Predictions + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs, 
             facies = "Facies", classy = "Predictions", fcolor = facies_colors,
             fnames = facies_names)

Data Imputation

Now, let’s do some analysis of the data. First, let’s evaluate if there are any missing values on each well log and indicators:

A = lapply(features, function(x) cat("Missing values of ", x, ": ", 
                                     sum(is.na(dataf[, x])), "\n", sep = ""))
## Missing values of GR: 0
## Missing values of ILD_log10: 0
## Missing values of DeltaPHI: 0
## Missing values of PHIND: 0
## Missing values of PE: 905
## Missing values of NM_M: 0
## Missing values of RELPOS: 0

Okay, there are 905 missing values for “PE”. For now, let’s replace the missing data simply by the “PE” column average:

dataf[is.na(dataf[,"PE"]), "PE"] <- mean(dataf[,"PE"], na.rm = TRUE)

cat("Missing values of PE:", sum(is.na(dataf[, "PE"])))
## Missing values of PE: 0

Now, let’s check all the well logs and facies classification for all the wells. The function plotwelllogs can be loaded from the file Utils.R:

wellname = unique(dataf$Well.Name)
for (ii in wellname){
  plotwelllogs(datap = dataf, wname = ii, dz = "Depth", wlogs = well_logs,
               facies = "Facies", fcolor = facies_colors, fnames = facies_names)
}

# This function is just to save the legend in a variable to use later
pright <- getLegend(data, "CHURCHMAN BIBLE", "Depth", well_logs, "Facies",
                    facies_colors, facies_names)

Data imputation using regression models

The PE for the wells Alexander D and Kimzey A are, actually, missing. Earlier I replaced the missing values by the average of the whole dataset, but it shows to be a poor solution. Instead, I’ll try now to replace the missing values by a linear regression prediction over the other well logs available. For that, we remove the “damaged” wells from the data, train a linear regression model, and do the predictions on the desired wells.

# Se3lecting list of "good" and "damaged" wells
dwells = c("ALEXANDER D", "KIMZEY A") # "Damaged" wells
gwells = setdiff(wellname, dwells)    # Good wells
gwells = setdiff(gwells, testWell)    # Good wells

# Creating linear model and making predictions
lmModel <- lm(PE ~ GR + ILD_log10 + DeltaPHI + PHIND,
              data = dataf[dataf$Well.Name %in% c(gwells), ])

Let’s check the solution over the test well:

pred = predict.lm(lmModel, dataf[dataf$Well.Name %in% c(testWell), ])
testlm = dataf[dataf$Well.Name %in% testWell, ]
testlm[ , "PE"] <- pred

plotwelllogs(datap = dataf, wname = testWell, dz = "Depth", wlogs = well_logs,
               facies = "Facies", fcolor = facies_colors, fnames = facies_names)

plotwelllogs(datap = testlm, wname = testWell, dz = "Depth", wlogs = well_logs,
               facies = "Facies", fcolor = facies_colors, fnames = facies_names)

Retrain the linear regression with all the good wells:

# Se3lecting list of "good" and "damaged" wells
dwells = c("ALEXANDER D", "KIMZEY A") # "Damaged" wells
gwells = setdiff(wellname, dwells)    # Good wells

# Creating linear model and making predictions
lmModel <- lm(PE ~ GR + ILD_log10 + DeltaPHI + PHIND,
              data = dataf[dataf$Well.Name %in% c(gwells), ])

Predicting the PE for the wells with missing values:

predictions = predict.lm(lmModel, dataf[dataf$Well.Name %in% c(dwells), ])
datalm <- dataf
datalm[which(datalm$Well.Name %in% c(dwells)), "PE"] <- predictions
cat("Missing values of PE:", sum(is.na(datalm[, "PE"])))
## Missing values of PE: 0

Let’s make the plot for both wells:

for (ii in dwells){
  plotwelllogs(datap = datalm, wname = ii, dz = "Depth", wlogs = well_logs,
               facies = "Facies", fcolor = facies_colors, fnames = facies_names)
  plotwelllogs(datap = dataf, wname = ii, dz = "Depth", wlogs = well_logs,
               facies = "Facies", fcolor = facies_colors, fnames = facies_names)
}

Okay, good. Let’s recreate the gradient boosting model with this new dataset. First, after the data imputation, split the data into test and train sets:

# For the xgboost package, target must be from 0 to num_class - 1
temp <- datalm
temp$Facies = temp$Facies - 1

# Splitting data
trainlm = temp[temp$Well.Name != testWell, ]
testlm = temp[temp$Well.Name == testWell, ]

Run the xgboost:

# Converting datasets to xgb.DMatrix
dtrainlm = xgb.DMatrix(data.matrix(trainlm[,features]),
                       label = data.matrix(trainlm$Facies))
dtestlm = xgb.DMatrix(data.matrix(testlm[,features]),
                      label = data.matrix(testlm$Facies))

# Calculating number of classes in the data
numberOfClasses <- length(unique(dataf$Facies))

# Setting up parameters
param <- list(max_depth = 6, 
              eta = .5, 
              silent = 1, 
              nthread = 2,
              #subsample = 0.5,
              objective = "multi:softmax", 
              eval_metric = "merror", 
              "num_class" = numberOfClasses) 

# Defining training and evaluation sets
watchlist <- list(train = dtrainlm, 
                  eval = dtestlm)

# Generating first model using Gradient Boosting Trees
modellm = xgb.train(param, 
                    dtrainlm,
                    nrounds = 75,
                    watchlist)
## [1]  train-merror:0.290608   eval-merror:0.648107 
## [2]  train-merror:0.259392   eval-merror:0.596882 
## [3]  train-merror:0.234254   eval-merror:0.583519 
## [4]  train-merror:0.220994   eval-merror:0.574610 
## [5]  train-merror:0.200829   eval-merror:0.565702 
## [6]  train-merror:0.183978   eval-merror:0.556793 
## [7]  train-merror:0.170166   eval-merror:0.543430 
## [8]  train-merror:0.157735   eval-merror:0.556793 
## [9]  train-merror:0.147790   eval-merror:0.543430 
## [10] train-merror:0.139503   eval-merror:0.536748 
## [11] train-merror:0.124586   eval-merror:0.543430 
## [12] train-merror:0.116022   eval-merror:0.543430 
## [13] train-merror:0.106077   eval-merror:0.541203 
## [14] train-merror:0.092265   eval-merror:0.543430 
## [15] train-merror:0.083978   eval-merror:0.532294 
## [16] train-merror:0.077348   eval-merror:0.534521 
## [17] train-merror:0.069337   eval-merror:0.541203 
## [18] train-merror:0.062155   eval-merror:0.536748 
## [19] train-merror:0.058011   eval-merror:0.536748 
## [20] train-merror:0.051105   eval-merror:0.545657 
## [21] train-merror:0.043094   eval-merror:0.543430 
## [22] train-merror:0.040055   eval-merror:0.527840 
## [23] train-merror:0.034807   eval-merror:0.530067 
## [24] train-merror:0.032873   eval-merror:0.530067 
## [25] train-merror:0.030387   eval-merror:0.530067 
## [26] train-merror:0.026519   eval-merror:0.532294 
## [27] train-merror:0.021271   eval-merror:0.532294 
## [28] train-merror:0.018232   eval-merror:0.532294 
## [29] train-merror:0.015746   eval-merror:0.523385 
## [30] train-merror:0.015470   eval-merror:0.523385 
## [31] train-merror:0.013812   eval-merror:0.525612 
## [32] train-merror:0.011602   eval-merror:0.532294 
## [33] train-merror:0.010497   eval-merror:0.527840 
## [34] train-merror:0.008840   eval-merror:0.525612 
## [35] train-merror:0.008011   eval-merror:0.530067 
## [36] train-merror:0.006077   eval-merror:0.516704 
## [37] train-merror:0.005249   eval-merror:0.516704 
## [38] train-merror:0.005249   eval-merror:0.512249 
## [39] train-merror:0.005249   eval-merror:0.503341 
## [40] train-merror:0.004972   eval-merror:0.505568 
## [41] train-merror:0.004696   eval-merror:0.501114 
## [42] train-merror:0.004420   eval-merror:0.516704 
## [43] train-merror:0.004144   eval-merror:0.510022 
## [44] train-merror:0.003591   eval-merror:0.501114 
## [45] train-merror:0.003039   eval-merror:0.501114 
## [46] train-merror:0.002762   eval-merror:0.501114 
## [47] train-merror:0.003039   eval-merror:0.501114 
## [48] train-merror:0.002762   eval-merror:0.507795 
## [49] train-merror:0.002762   eval-merror:0.512249 
## [50] train-merror:0.002486   eval-merror:0.512249 
## [51] train-merror:0.002486   eval-merror:0.510022 
## [52] train-merror:0.002486   eval-merror:0.512249 
## [53] train-merror:0.002486   eval-merror:0.507795 
## [54] train-merror:0.002486   eval-merror:0.507795 
## [55] train-merror:0.002486   eval-merror:0.503341 
## [56] train-merror:0.002486   eval-merror:0.503341 
## [57] train-merror:0.002486   eval-merror:0.503341 
## [58] train-merror:0.002486   eval-merror:0.505568 
## [59] train-merror:0.002486   eval-merror:0.510022 
## [60] train-merror:0.002486   eval-merror:0.507795 
## [61] train-merror:0.002486   eval-merror:0.512249 
## [62] train-merror:0.002486   eval-merror:0.516704 
## [63] train-merror:0.002486   eval-merror:0.514477 
## [64] train-merror:0.002486   eval-merror:0.505568 
## [65] train-merror:0.002486   eval-merror:0.505568 
## [66] train-merror:0.002486   eval-merror:0.507795 
## [67] train-merror:0.002486   eval-merror:0.507795 
## [68] train-merror:0.002486   eval-merror:0.507795 
## [69] train-merror:0.002486   eval-merror:0.512249 
## [70] train-merror:0.002486   eval-merror:0.505568 
## [71] train-merror:0.002486   eval-merror:0.510022 
## [72] train-merror:0.002486   eval-merror:0.510022 
## [73] train-merror:0.002486   eval-merror:0.507795 
## [74] train-merror:0.002486   eval-merror:0.503341 
## [75] train-merror:0.002486   eval-merror:0.498886
# Computing prediction estimations for the test data
testlm_pred = predict(modellm, 
                      newdata = data.matrix(testlm[,features]))
trainlm_pred = predict(modellm, 
                       newdata = data.matrix(trainlm[,features]))

testlm$Predictions = testlm_pred

# confusion matrix of test set
xtablm = createConfusionMatrix(testlm$Facies + 1, 
                               testlm$Predictions + 1)
xtablm = t(xtablm)
rownames(xtablm) <- facies_names[1:8]
colnames(xtablm) <- facies_names[1:8]
plotConfusionMatrix(xtablm, relScale = T)

plotConfusionMatrix(xtab, relScale = T)

# Confucion matrix and statistical information
u <- union(testlm$Predictions + 1, testlm$Facies + 1)
t <- table(factor(testlm$Predictions + 1, u), factor(testlm$Facies + 1, u))
CMlm = confusionMatrix(t,
                     mode = "everything")
cat("Accuracy with imputed data:", CMlm$overall[1], '\n')
## Accuracy with imputed data: 0.5011136
cat("Previous accuracy:", CM$overall[1], '\n')
## Previous accuracy: 0.4743875
dtemp = testlm
dtemp$Predictions <- dtemp$Predictions + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs, 
             facies = "Facies", classy = "Predictions", fcolor = facies_colors,
             fnames = facies_names)

The data imputation of the missing PE logs increased the process accuracy, but it is still too low. We need to work with feature augement. In others words, we need to use our data to create new features, by using some combination rule.

Feature Engeneering

Let’s do some analysis of the data and do a pairs plot:

pm = ggpairs(datalm, 
             columns = match(c(well_logs), colnames(datalm)),
             aes(colour = factor(Facies), 
                 alpha = 0.3),
             diag = list(continuous = "barDiag"),
             upper = list(continuous = "density"),
             lower = list(continuous = "points", combo = "dot_no_facet"),
             legend = pright)

# Change color manually.
# Loop through each plot changing relevant scales
for(i in 1:pm$nrow) {
  for(j in 1:pm$ncol){
    pm[i,j] <- pm[i,j] + 
        scale_fill_manual(values=c(facies_colors)) +
        scale_color_manual(values=c(facies_colors))  
  }
}

pm

Polar coordinates

We can see some “circular” behavior of the pairs plot and data classification (facies). It is more clear when analyzing the density plots. So, to see if we can improve the facies classification, let’s convert the “cardinal” coordinates of the pairs to polar coordinates, using the function cart2polwells from the support file Utils.R.

data_polar = cart2polwells(datalm,well_logs)
nofeatures = c("Facies","Formation", "Well.Name", "Depth")
features_pol = colnames(data_polar)
features_pol = features_pol[! features_pol %in% nofeatures]

Now, with the new coordinates, recreate the test and validation sets and then train the model:

# For the xgboost package, target must be from 0 to num_class - 1
temp <- data_polar
temp$Facies = temp$Facies - 1

# Splitting data
trainpol = temp[temp$Well.Name != testWell, ]
testpol = temp[temp$Well.Name == testWell, ]

# Converting datasets to xgb.DMatrix
dtrainpol = xgb.DMatrix(data.matrix(trainpol[,features_pol]),
                        label = data.matrix(trainpol$Facies))
dtestpol = xgb.DMatrix(data.matrix(testpol[,features_pol]), 
                       label = data.matrix(testpol$Facies))

mdepth = round(length(features_pol)/2)

# Setting up parameters
param <- list(max_depth = 5, 
              eta = .3, 
              silent = 1, 
              nthread = 10,
              objective = "multi:softmax", 
              eval_metric = "merror", 
              "num_class" = numberOfClasses) 

# Defining training and evaluation sets
watchlist <- list(train = dtrainpol,
                  eval = dtestpol)

# Generating first model using Gradient Boosting Trees
modelpol = xgb.train(param,
                     dtrainpol,
                     nrounds = 47,
                     watchlist)
## [1]  train-merror:0.301105   eval-merror:0.563474 
## [2]  train-merror:0.276796   eval-merror:0.534521 
## [3]  train-merror:0.252486   eval-merror:0.534521 
## [4]  train-merror:0.240884   eval-merror:0.523385 
## [5]  train-merror:0.233425   eval-merror:0.516704 
## [6]  train-merror:0.220442   eval-merror:0.514477 
## [7]  train-merror:0.216575   eval-merror:0.534521 
## [8]  train-merror:0.204696   eval-merror:0.518931 
## [9]  train-merror:0.195304   eval-merror:0.512249 
## [10] train-merror:0.187845   eval-merror:0.507795 
## [11] train-merror:0.181492   eval-merror:0.516704 
## [12] train-merror:0.171547   eval-merror:0.510022 
## [13] train-merror:0.164917   eval-merror:0.505568 
## [14] train-merror:0.156630   eval-merror:0.501114 
## [15] train-merror:0.152210   eval-merror:0.505568 
## [16] train-merror:0.148066   eval-merror:0.505568 
## [17] train-merror:0.145028   eval-merror:0.510022 
## [18] train-merror:0.138122   eval-merror:0.501114 
## [19] train-merror:0.130111   eval-merror:0.516704 
## [20] train-merror:0.124862   eval-merror:0.512249 
## [21] train-merror:0.120718   eval-merror:0.512249 
## [22] train-merror:0.116575   eval-merror:0.518931 
## [23] train-merror:0.113812   eval-merror:0.516704 
## [24] train-merror:0.107459   eval-merror:0.516704 
## [25] train-merror:0.099448   eval-merror:0.518931 
## [26] train-merror:0.093923   eval-merror:0.516704 
## [27] train-merror:0.087293   eval-merror:0.516704 
## [28] train-merror:0.083149   eval-merror:0.514477 
## [29] train-merror:0.076519   eval-merror:0.507795 
## [30] train-merror:0.072099   eval-merror:0.512249 
## [31] train-merror:0.069890   eval-merror:0.514477 
## [32] train-merror:0.067956   eval-merror:0.514477 
## [33] train-merror:0.064641   eval-merror:0.510022 
## [34] train-merror:0.058564   eval-merror:0.507795 
## [35] train-merror:0.056354   eval-merror:0.512249 
## [36] train-merror:0.052762   eval-merror:0.512249 
## [37] train-merror:0.049171   eval-merror:0.503341 
## [38] train-merror:0.046961   eval-merror:0.496659 
## [39] train-merror:0.043370   eval-merror:0.501114 
## [40] train-merror:0.039779   eval-merror:0.496659 
## [41] train-merror:0.036740   eval-merror:0.503341 
## [42] train-merror:0.030663   eval-merror:0.503341 
## [43] train-merror:0.027072   eval-merror:0.494432 
## [44] train-merror:0.024586   eval-merror:0.492205 
## [45] train-merror:0.022099   eval-merror:0.492205 
## [46] train-merror:0.020442   eval-merror:0.489978 
## [47] train-merror:0.019061   eval-merror:0.487751

Plot confusion matrix and evaluate accuracy:

# Computing prediction estimations for the test data
testpol_pred = predict(modelpol, 
                       newdata = data.matrix(testpol[,features_pol]))
trainpol_pred = predict(modelpol, 
                        newdata = data.matrix(trainpol[,features_pol]))

testpol$Predictions = testpol_pred

# confusion matrix of test set
xtabpol = createConfusionMatrix(testpol$Facies + 1, 
                                testpol$Predictions + 1)
xtabpol = t(xtabpol)
rownames(xtabpol) <- facies_names[1:8]
colnames(xtabpol) <- facies_names
plotConfusionMatrix(xtablm, relScale = T)

plotConfusionMatrix(xtabpol, relScale = T)

# Confucion matrix an statistical information
u <- union(testpol$Predictions + 1, testpol$Facies + 1)
t <- table(factor(testpol$Predictions + 1, u), factor(testpol$Facies + 1, u))

# Confucion matrix and statistical information
CMpol = confusionMatrix(t,
                        mode = "everything")
cat("Accuracy (polar):   ",CMpol$overall[1], "\n")
## Accuracy (polar):    0.5122494
cat("Accuracy (LM):      ",CMlm$overall[1], "\n")
## Accuracy (LM):       0.5011136
cat("Accuracy (original):",CM$overall[1], "\n")
## Accuracy (original): 0.4743875
dtemp = testpol
dtemp$Predictions <- dtemp$Predictions + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs, 
             facies = "Facies", classy = "Predictions", fcolor = facies_colors,
             fnames = facies_names)

Gradient of the features

With the data imputation and the polar coordinates changes, we could increase the precision of the model by around 3.5 percentual points. Now, others competitors used the feature gradient as new features. Let’s do the same by using the function features_gradient from the file Utils.R:

data_grad = features_gradient(data_polar, features_pol, wellname)
features_grad = colnames(data_grad)
features_grad = features_grad[! features_grad %in% nofeatures]

Now, new model training:

# For the xgboost package, target must be from 0 to num_class - 1
temp <- data_grad
temp$Facies = temp$Facies - 1

# Splitting data
traingrad = temp[temp$Well.Name != testWell, ]
testgrad = temp[temp$Well.Name == testWell, ]

# Converting datasets to xgb.DMatrix
dtraingrad = xgb.DMatrix(data.matrix(traingrad[,features_grad]),
                         label = data.matrix(traingrad$Facies))
dtestgrad = xgb.DMatrix(data.matrix(testgrad[,features_grad]),
                        label = data.matrix(testgrad$Facies))
mdepth = round(length(features_grad)/2)

# Setting up parameters
param <- list(max_depth = 7, 
              eta = .3, 
              silent = 1, 
              nthread = 2,
              objective = "multi:softmax", 
              eval_metric = "merror", 
              "num_class" = numberOfClasses) 

# Defining training and evaluation sets
watchlist <- list(train = dtraingrad,
                  eval = dtestgrad)

# Generating first model using Gradient Boosting Trees
modelgrad = xgb.train(param,
                      dtraingrad,
                      nrounds = 11,
                      watchlist)
## [1]  train-merror:0.203591   eval-merror:0.516704 
## [2]  train-merror:0.158840   eval-merror:0.487751 
## [3]  train-merror:0.138122   eval-merror:0.478842 
## [4]  train-merror:0.118508   eval-merror:0.469933 
## [5]  train-merror:0.105525   eval-merror:0.485523 
## [6]  train-merror:0.090884   eval-merror:0.472160 
## [7]  train-merror:0.080939   eval-merror:0.472160 
## [8]  train-merror:0.067956   eval-merror:0.465479 
## [9]  train-merror:0.055525   eval-merror:0.465479 
## [10] train-merror:0.048619   eval-merror:0.454343 
## [11] train-merror:0.041160   eval-merror:0.445434
# Computing prediction estimations for the test data
testgrad_pred = predict(modelgrad, 
                        newdata = data.matrix(testgrad[,features_grad]))
traingrad_pred = predict(modelgrad, 
                         newdata = data.matrix(traingrad[,features_grad]))

testgrad$Predictions = testgrad_pred

# confusion matrix of test set
xtabgrad = createConfusionMatrix(testgrad$Facies + 1, 
                                 testgrad$Predictions + 1)
xtabgrad = t(xtabgrad)
rownames(xtabgrad) <- facies_names[1:8]
colnames(xtabgrad) <- facies_names[1:8]
plotConfusionMatrix(xtabgrad, relScale = T)

plotConfusionMatrix(xtabpol, relScale = T)

# Confucion matrix and statistical information
u <- union(testgrad$Predictions + 1, testgrad$Facies + 1)
t <- table(factor(testgrad$Predictions + 1, u), factor(testgrad$Facies + 1, u))
CMgrad = confusionMatrix(t,
                         mode = "everything")
cat("Accuracy (gradient):",CMgrad$overall[1], "\n")
## Accuracy (gradient): 0.5545657
cat("Accuracy (polar):   ",CMpol$overall[1], "\n")
## Accuracy (polar):    0.5122494
cat("Accuracy (LM):      ",CMlm$overall[1], "\n")
## Accuracy (LM):       0.5011136
cat("Accuracy (original):",CM$overall[1], "\n")
## Accuracy (original): 0.4743875
dtemp = testgrad
dtemp$Predictions <- dtemp$Predictions + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs, 
             facies = "Facies", classy = "Predictions", fcolor = facies_colors,
             fnames = facies_names)

Creating the gradient over all the features showed to be an excellent decision, and the accuracy is increased. To deliver the final facies classification predictions, we assume that isolated facies inside a “block” is something rare (not impossible, though). So, we will “fix” is by applying a median filter.

Correcting the predictions

From the package signal, we will use the function medfilt1 to filter isolated classified facies in an area, by replacing it with the median of the window.

testgrad2 = testgrad
testgrad2$Classy = testgrad_pred
for (w in wellname){
  testgrad2[which(testgrad2$Well.Name %in% w), "Classy"] = 
    runmed(testgrad2[which(testgrad2$Well.Name %in% w), "Classy"],
           k = 5, endrule = c("keep"))
}

u = union(testgrad2$Facies, testgrad2$Predictions)

# confusion matrix of test set
xtabgrad2 = createConfusionMatrix(testgrad2$Facies + 1, 
                                  testgrad2$Classy + 1)
xtabgrad2 = t(xtabgrad2)
rownames(xtabgrad2) <- facies_names[1:8]
colnames(xtabgrad2) <- facies_names[1:8]
plotConfusionMatrix(xtabgrad, relScale = T)

plotConfusionMatrix(xtabgrad2, relScale = T)

u <- union(testgrad2$Classy + 1, testgrad2$Facies + 1)
t <- table(factor(testgrad2$Classy + 1, u), factor(testgrad2$Facies + 1, u))

# Confucion matrix and statistical information
CMgrad2 = confusionMatrix(t,
                          mode = "everything")
cat("Accuracy (gradient):",CMgrad2$overall[1], "\n")
## Accuracy (gradient): 0.5701559

Plotting the final predictions:

dtemp = testgrad2
dtemp$Classy <- dtemp$Classy + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs,
             facies = "Facies", classy = "Classy", fcolor = facies_colors,
             fnames = facies_names)

Chacking features importance:

importance <- xgb.importance(feature_names = features_grad, model = modelgrad)
xgb.ggplot.importance(importance, top_n = 10, measure = "Gain", rel_to_first = T, n_clusters = 5)

The choice of the package xgboost led us to a very accurate facies classification, even considering the low amount of data to train the gradient boosting trees models.

Clustering

Now, let’s try to run a clustering algorithm to create new features. Clustering algorithms, for this purpose, are considered “unsupervised learning”, as they don’t require to be trained with the correct answer. Those algorithms just find similarities between the data intupts and separate them in clusters.

The next cell will replace missing values of the data by 0 and, after, use the kmeans to create 5 clusters in the data.

set.seed(101)

# Features not used for the clustering
no_clust = c("RELPOS", "NM_M", "RELPOS_grad", "NM_M_grad")
features_cluster = features_grad[! features_grad %in% no_clust]

# Replacing missing values by 0
for (x in features_cluster) {
  data_grad[is.na(data_grad[,x]), x] <- 0
}

#features_cluster = features_grad[features_grad != no_clust]
Cluster = kmeans(as.matrix(data_grad[,features_cluster]), 6)
data_grad$cluster = as.factor(Cluster$cluster)
head(data_grad)

Training the classifier:

# For the xgboost package, target must be from 0 to num_class - 1
temp <- data_grad
temp$Facies = temp$Facies - 1

# Splitting data
trainclust = temp[temp$Well.Name != testWell, ]
testclust = temp[temp$Well.Name == testWell, ]

# New set of features
features_clust = c(features_grad,"cluster")

# Converting datasets to xgb.DMatrix
dtrainclust = xgb.DMatrix(data.matrix(trainclust[,features_clust]),
                          label = data.matrix(trainclust$Facies))
dtestclust = xgb.DMatrix(data.matrix(testclust[,features_clust]),
                         label = data.matrix(testclust$Facies))
mdepth = round(length(features_clust)/2)

# Setting up parameters
param <- list(max_depth = 8, 
              eta = .3, 
              silent = 1, 
              nthread = 2,
              objective = "multi:softmax", 
              eval_metric = "merror", 
              "num_class" = numberOfClasses) 

# Defining training and evaluation sets
watchlist <- list(train = dtrainclust,
                  eval = dtestclust)

# Generating first model using Gradient Boosting Trees
modelclust = xgb.train(param,
                       dtrainclust,
                       nrounds = 27,
                       watchlist)
## [1]  train-merror:0.172928   eval-merror:0.525612 
## [2]  train-merror:0.129834   eval-merror:0.492205 
## [3]  train-merror:0.099171   eval-merror:0.487751 
## [4]  train-merror:0.079558   eval-merror:0.469933 
## [5]  train-merror:0.066575   eval-merror:0.487751 
## [6]  train-merror:0.056630   eval-merror:0.485523 
## [7]  train-merror:0.041713   eval-merror:0.485523 
## [8]  train-merror:0.031492   eval-merror:0.483296 
## [9]  train-merror:0.022928   eval-merror:0.481069 
## [10] train-merror:0.017956   eval-merror:0.474388 
## [11] train-merror:0.014088   eval-merror:0.472160 
## [12] train-merror:0.009945   eval-merror:0.465479 
## [13] train-merror:0.008287   eval-merror:0.463252 
## [14] train-merror:0.005801   eval-merror:0.463252 
## [15] train-merror:0.004144   eval-merror:0.463252 
## [16] train-merror:0.002762   eval-merror:0.463252 
## [17] train-merror:0.002486   eval-merror:0.469933 
## [18] train-merror:0.001934   eval-merror:0.465479 
## [19] train-merror:0.001657   eval-merror:0.465479 
## [20] train-merror:0.001657   eval-merror:0.463252 
## [21] train-merror:0.001657   eval-merror:0.461024 
## [22] train-merror:0.001657   eval-merror:0.456570 
## [23] train-merror:0.001657   eval-merror:0.454343 
## [24] train-merror:0.001657   eval-merror:0.452116 
## [25] train-merror:0.001381   eval-merror:0.452116 
## [26] train-merror:0.001105   eval-merror:0.454343 
## [27] train-merror:0.001105   eval-merror:0.456570
# Computing prediction estimations for the test data
testclust_pred = predict(modelclust, 
                         newdata = data.matrix(testclust[,features_clust]))
trainclust_pred = predict(modelclust, 
                          newdata = data.matrix(trainclust[,features_clust]))

testclust$Predictions = testclust_pred

# confusion matrix of test set
xtabclust = createConfusionMatrix(testclust$Facies + 1, 
                                  testclust$Predictions + 1)
xtabclust = t(xtabclust)
rownames(xtabgrad) <- facies_names[1:8]
colnames(xtabgrad) <- facies_names[1:8]
plotConfusionMatrix(xtabgrad, relScale = T)

plotConfusionMatrix(xtabclust, relScale = T)

# Confucion matrix and statistical information
u <- union(testclust$Predictions + 1, testclust$Facies + 1)
t <- table(factor(testclust$Predictions + 1, u), factor(testclust$Facies + 1, u))
CMclust = confusionMatrix(t,
                          mode = "everything")
cat("Accuracy (cluster): ",CMclust$overall[1], "\n")
## Accuracy (cluster):  0.5434298
cat("Accuracy (gradient):",CMgrad2$overall[1], "\n")
## Accuracy (gradient): 0.5701559
cat("Accuracy (polar):   ",CMpol$overall[1], "\n")
## Accuracy (polar):    0.5122494
cat("Accuracy (LM):      ",CMlm$overall[1], "\n")
## Accuracy (LM):       0.5011136
cat("Accuracy (original):",CM$overall[1], "\n")
## Accuracy (original): 0.4743875
dtemp = testclust
dtemp$Predictions <- dtemp$Predictions + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs, 
             facies = "Facies", classy = "Predictions", fcolor = facies_colors,
             fnames = facies_names)

testclust2 = testclust
testclust2$Classy = testclust_pred
for (w in wellname){
  testclust2[which(testclust2$Well.Name %in% w), "Classy"] = 
    runmed(testclust2[which(testclust2$Well.Name %in% w), "Classy"],
           k = 5, endrule = c("keep"))
}

u = union(testclust2$Facies, testclust2$Predictions)

# confusion matrix of test set
xtabclust2 = createConfusionMatrix(testclust2$Facies + 1, 
                                   testclust2$Classy + 1)
xtabclust2 = t(xtabclust2)
rownames(xtabclust2) <- facies_names[1:8]
colnames(xtabclust2) <- facies_names[1:8]
plotConfusionMatrix(xtabclust, relScale = T)

plotConfusionMatrix(xtabclust2, relScale = T)

u <- union(testclust2$Classy + 1, testclust2$Facies + 1)
t <- table(factor(testclust2$Classy + 1, u), factor(testclust2$Facies + 1, u))

# Confucion matrix and statistical information
CMclust2 = confusionMatrix(t,
                          mode = "everything")
cat("Accuracy (cluster): ",CMclust2$overall[1], "\n")
## Accuracy (cluster):  0.5790646
cat("Accuracy (gradient):",CMgrad2$overall[1], "\n")
## Accuracy (gradient): 0.5701559
cat("Accuracy (polar):   ",CMpol$overall[1], "\n")
## Accuracy (polar):    0.5122494
cat("Accuracy (LM):      ",CMlm$overall[1], "\n")
## Accuracy (LM):       0.5011136
cat("Accuracy (original):",CM$overall[1], "\n")
## Accuracy (original): 0.4743875
dtemp = testclust2
dtemp$Classy <- dtemp$Classy + 1
dtemp$Facies <- dtemp$Facies + 1
plotwellPred(datap = dtemp, wname = testWell, dz = "Depth", wlogs = well_logs,
             facies = "Facies", classy = "Classy", fcolor = facies_colors,
             fnames = facies_names)

importance <- xgb.importance(feature_names = features_clust, model = modelclust)
xgb.ggplot.importance(importance, top_n = 10, measure = "Gain", rel_to_first = T, n_clusters = 5)